﻿using System.Text.RegularExpressions;

namespace wojilu
{
    /// <summary>
    /// 处理HTML
    /// </summary>
    public static class HTML
    {
        #region 去除HTML标记
        /// <summary>
        /// 去除HTML标记
        /// </summary>
        /// <param name="Htmlstring">包括HTML的源码 </param>
        /// <returns>已经去除后的文字</returns>
        public static string NoHTML(this string Htmlstring)
        {
            if (string.IsNullOrEmpty(Htmlstring) == true)
            {
                return string.Empty;
            }

            //Htmlstring = Regex.Replace(Htmlstring, @"<script[^>]*?>[\s\S]*?</script>", "", RegexOptions.IgnoreCase);
            //Htmlstring = Regex.Replace(Htmlstring, @"<style[^>]*?>[\s\S]*?</style>", "", RegexOptions.IgnoreCase);

            //Htmlstring = Regex.Replace(Htmlstring, @"<!--.*", "");
            Htmlstring = Regex.Replace(Htmlstring, "<[^>]*>", "");
            Htmlstring = Regex.Replace(Htmlstring, @"([\r\n])[\s]+", " ");
            //Htmlstring = Htmlstring.Replace(@"-->", "");
            return Htmlstring.Replace("&nbsp;", " ");
        }
        #endregion

    }
}
